**************************************************************************************************************
**	Replication do-file for "Governmment Policies and the Emergence of an Ethnic Dimension in Party Systems"
**	Author: Maayan Mor
**	Purpose: Create "state_wide.dta," the Prussian election dataset. 
**************************************************************************************************************

clear all
set more off

/* SET DIRECTORY */

************************************
**		Liberal vote share   1863
************************************
import excel "Liberal vote share 1863.xlsx", sheet("Sheet1") firstrow clear

drop if Kuehne_ID=="" | Kuehne_ID=="29-44" // drop missing observations and one artificial Berlin constituency

destring Kuehne_ID, replace
drop if Kuehne_ID>145 & Kuehne_ID<165 // Schleswig-Holstein
drop if Kuehne_ID>164 & Kuehne_ID<201 // Hannover
drop if Kuehne_ID>222 & Kuehne_ID<249 // Hesse-Nasau
drop if Kuehne_ID==285                // Sigmaringen
save "all csts 1863.dta", replace 

**		Berlin had elections in four constituencies but census data are reported
**		only for the entire city, so the results need to be aggregated to a
**		single synthetic constituency. 
drop if Kuehne_ID>28 & Kuehne_ID<34 /* Berlin */

save "all csts 1863 without Berlin.dta", replace 

use "all csts 1863.dta", replace
keep if Kuehne_ID>28 & Kuehne_ID<34 /* keep only Berlin */
recode Kuehne_ID (30/32=29) /* recode Berlin into a single synthetic constituency */
collapse (mean) conservative63 liberal63 (sum) DM, by(Kuehne_ID) /*take the average vote share*/

append using "all csts 1863 without Berlin.dta"

save "Liberal vote share 1863.dta", replace

****************************************
**		Election results 1867-1870-1873
****************************************
import excel "Election results Kuehne handbook.xlsx", ///
sheet("Sheet1") firstrow clear

drop if cst_k==. // drop empty cells
recode year (1867=67) (1870=70) (1873=73)

** code missing vote share to missing
replace vs=" " if vs=="N/A"
destring vs, replace

** 	Include only first round elections. If there was no information about the 
**	first round of elections, use the information from the second round. 
replace round="1" if round=="1*" | round=="2*"
destring round, replace

**	keep only the first election round
keep if round==1

** keep only general elections (Allgemeine Wahl)
keep if wahl=="AL"

** recode the vote shares of candidates from the same party family that competed against 
** one another. Clean the * and _ from the names of parties
replace party = regexr(party, "_", "")

replace party="F" if party=="F vs F"
replace party="N" if party=="N vs N"
replace party="R" if party=="R vs R"
replace party="R" if party=="R vs R*"
replace party="R" if party=="R* vs R"
replace party="Z" if party=="Z vs Z"

replace party="F" if party=="F*"
replace party="L" if party=="L*"
replace party="N" if party=="N*"
replace party="R" if party=="R*"
replace party="Z" if party=="Z*"

save "biographies 67-73.dta", replace /*this will be used later for Figure 1*/

** code parties into party families
gen party_family="other"
replace party_family="liberal all" if party=="F" | party=="L" | party=="LC" | party=="N" | party=="AL"
replace party_family="conservative" if party=="K" | party=="KN" | party=="KA" | party=="R" | party=="RZ"
replace party_family="center" if party=="Z"
label var party_family "party family 4 categories"

**  In some constituencies, liberal candidates competed against one another. 
**	Calculate the average vote share by constituency-year for each party family
egen cst_year_pty=concat(party_family cst_k year)
egen vs_family=mean(vs), by(cst_year_pty)

foreach i of numlist 67 70 73{
gen liberal`i'=vs_family if party_family=="liberal all" & year==`i'
gen conserv`i'=vs_family if party_family=="conservative" & year==`i'
gen center`i'=vs_family if party_family=="center" & year==`i'
gen other`i'=vs_family if party_family=="other" & year==`i'
}

collapse liberal67-other73 urban  baseline_sample, by(cst_k)

** In case all the candidates in the district were from the same party family, code vote share to 100
foreach j of numlist 67 70 73{
replace liberal`j'=100 if conserv`j'==. & center`j'==. & other`j'==. 
replace conserv`j'=100 if liberal`j'==. & center`j'==. & other`j'==. 
replace center`j'=100 if conserv`j'==. & liberal`j'==. & other`j'==. 
}

rename cst_k Kuehne_ID

save "1867-73 only Berlin.dta", replace 

**	Calculate vote shares for Berlin and then collapse into one synthetic constituency
drop if Kuehne_ID>28 & Kuehne_ID<34
save "1867-73 without Berlin.dta", replace 

use "1867-73 only Berlin.dta", replace
keep if Kuehne_ID>28 & Kuehne_ID<34
recode conserv67 conserv73 (.=0) if Kuehne_ID==30 | Kuehne_ID==31 | Kuehne_ID==32 
recode Kuehne_ID (30/32=29)
collapse (mean) liberal67-other73 urban, by(Kuehne_ID)

append using "1867-73 without Berlin.dta"
replace baseline_sample=1 if Kuehne_ID==29

sort Kuehne_ID
save "election data 67-73.dta", replace

******************************
** Distance from Wittenberg 
******************************
import excel using "wittenberg.xlsx", sheet("Sheet1") firstrow clear

keep Kh_ID distance
rename Kh_ID Kuehne_ID 
egen wittenberg=mean(distance), by(Kuehne_ID)
duplicates drop Kuehne_ID wittenberg, force
label var wittenberg "Wittenberg"

save "distance from Wittenberg.dta", replace

*************************
** Distance from Paris 
*************************
import excel "distance from Paris.xlsx", sheet("Sheet1") firstrow clear

keep Kh_ID distance
rename Kh_ID Kuehne_ID 
egen paris=mean(distance), by(Kuehne_ID)
duplicates drop Kuehne_ID paris, force
label var paris "Paris"

save "distance from paris.dta", replace

**********************************
** Census data from iPEHD 
**********************************
import excel "kuhne kreiskey1864.xls", sheet("kuhene kreiskey1864") firstrow clear 
keep kreiskey1864 kuehne_id rb regierungsbezirk province
rename kuehne_id Kuehne_ID
save "kuhne kreiskey1864.dta", replace

*** 1864 data
import excel "ipehd_1864_rel_deno.xlsx", sheet("ipehd_1864_rel_deno") firstrow clear 
keep  county rb rel1864_cat kreiskey1864
save "ipehd_1864_population.dta", replace

import excel "ipehd_1864_occ_indu.xls", sheet("ipehd_1864_occ_indu") firstrow clear
save "ipehd_1864_occ_indu.dta", replace

import excel "ipehd_1864_pop_demo.xls", sheet("ipehd_1864_pop_demo") firstrow clear
keep kreiskey1864 county rb	pop1864_tot pop1864_civ 
save "ipehd_1864_pop_demo", replace

use "ipehd_1864_population.dta"

merge 1:1 kreiskey1864 using "ipehd_1864_occ_indu.dta"
rename _merge merge1864_2 // all match

merge 1:1 kreiskey1864 using "ipehd_1864_pop_demo.dta"
rename _merge merge1864_3 // all match

drop if kreiskey1864>343 // those are counties that are a part of Sigmaringen; not included in the analysis
drop merge*

save "ipehd_1864.dta", replace

** 1849 data
import excel "ipehd_1849_rel_church.xls", sheet("ipehd_1849_rel_church") firstrow clear
keep kreiskey1849 rel1849_cat_priest rel1849_cat_chaplain_vicar rel1849_cat_main_church rb county

save "ipehd_1849_rel_church.dta", replace

import excel "ipehd_1849_pop_demo.xls", sheet("ipehd_1849_pop_demo") firstrow clear
keep kreiskey1849 rb pop1849_tot pop1849_m_8to14 pop1849_f_8to14
save "ipehd_1849_pop_demo.dta", replace

import excel "ipehd_1849_rel_deno.xls", sheet("ipehd_1849_rel_deno") firstrow clear
keep kreiskey1849 rb rel1849_cat

save "ipehd_1849_rel_deno.dta", replace

merge 1:1 kreiskey1849 using "ipehd_1849_pop_demo.dta"
rename _merge merge_1849 // all merged
drop merge_1849

merge 1:1 kreiskey1849 using "ipehd_1849_rel_church.dta"  // all merged
drop _merge
save "ipehd_1849.dta", replace

/*****************************************************************
**	Calculate all of the variables at the 1864 Kreis level

There are five 1849 counties that were split in 1864. 
So I am merging many to 1. 

123 Randow split into Randow and Stettin: split in two
295 parts of Duisburg became Essen Landkreis but this should be fine: split in two
296 Geldern split into Geldern and Mors: split in two
300 Elberfeld 294 split into Baremen and Mettman and Elberfeld: split into three
302 Elberfeld 294 split into Baremen and Mettman and Elberfeld: split into three
*****************************************************************/
insheet using "ipehd_merge_county.csv", clear  
drop if kreiskey1849==. 
merge m:1 kreiskey1849 using "ipehd_1849.dta"  
rename _merge merge1849_kreiskey 
collapse (sum) rel1849_cat_main_church rel1849_cat_priest pop1849_tot ///
rel1849_cat_chaplain_vicar rel1849_cat pop1849_m_8to14 pop1849_f_8to14 ///
, by(kreiskey1864)
save "ipehd_1849_in_1864.dta", replace

******************************************************************
**	Now merge all of the files with the master coversion key
******************************************************************
use "ipehd_1864.dta", clear

merge 1:1 kreiskey1864 using "ipehd_1849_in_1864.dta" // all merge
rename _merge merge1849  
drop merge*

merge 1:1 kreiskey1864 using "kuhne kreiskey1864.dta"
drop if kreiskey1864>343 // Sigmaringen

collapse (sum) occ1864_ind pop* rel*, by(Kuehne_ID)

save "ipehd_data.dta", replace

********************************************
** now isolate kuehne_id, rb, and province
********************************************
use "kuhne kreiskey1864.dta", clear
duplicates drop Kuehne_ID rb regierungsbezirk province, force
drop if Kuehne_ID==285
save "kuhne_id rb province.dta", replace

************************
**  Merge all files
************************
use "ipehd_data.dta", clear

merge 1:1 Kuehne_ID using "kuhne_id rb province.dta" /* all merged */

rename _merge mergerb
rename rb rb_abbrv

merge 1:1 Kuehne_ID using "Liberal vote share 1863.dta" /* all merged */

rename _merge merge63

merge 1:1 Kuehne_ID using "election data 67-73.dta" /* all merged */ 

**		drop constituencies that were not a part of Prussia before 1864
drop if Kuehne_ID>145 & Kuehne_ID<165 // Schleswig-Holstein
drop if Kuehne_ID>164 & Kuehne_ID<201 // Hannover
drop if Kuehne_ID>222 & Kuehne_ID<249 // Hesse-Nasau

drop _merge

*********************************************************************
** Monasteries. This is merged based on administrative district (RB)
*********************************************************************
merge m:1 regierungsbezirk using "priests churches and monastaries.dta", keepusing(monastaries)
drop _merge /* all merged */

***********************************************
** code years of napoleonic rule
***********************************************
gen napo=0
replace napo=6 if province=="westfalen" | regierungsbezirk=="Erfurt" ///
| regierungsbezirk=="Magdeburg" | regierungsbezirk=="Merseburg"
replace napo=19 if province=="rhineprovinz"

******************************
** merge distance from Paris
******************************
merge 1:1 Kuehne_ID using "distance from paris.dta" 
drop _merge /* all merged */

***********************************
** merge distance from Wittenberg 
***********************************
merge 1:1 Kuehne_ID using "distance from Wittenberg.dta" 
drop _merge /* all merged */

***************************
** Create new variables
***************************
gen cathp_64=(rel1864_cat/pop1864_tot)*100
label var cathp_64 "1864: % Catholics"

gen cathp_49=(rel1849_cat/pop1849_tot)*100
label var cathp_49  "1849; Percentage Catholics"

gen industry=(occ1864_ind/pop1864_tot)*100
label var industry "1864: % employed in Industry"

gen cath_th49=rel1849_cat/10000
label var cath_th49  "1849; Catholic residents divided by 10,000"

gen priest_vicar=(rel1849_cat_priest+rel1849_cat_chaplain_vicar)/cath_th49
label var priest_vicar "1849; Catholic vicars and priests for 10,000 Catholics"

egen cath_rb=total(rel1864_cat), by(regierungsbezirk)
label var cath_rb "1864; total Catholics in Regierungsbezirk"

gen cath_rb_hun_th=cath_rb/100000
label var cath_rb_hun_th "1864; Catholic residents in RB divided by 100,000"

gen monastery=monastaries/cath_rb_hun_th
label var monastery "1864; number of monasteries for 100,000 Catholics"

gen gap6370_lib=liberal70-liberal63
label var gap6370_lib "Change in Liberal vote share 63-70"

gen gap6373_lib=liberal73-liberal63
label var gap6373_lib "Change in Liberal vote share 63-73"

gen gap6770_lib=liberal70-liberal67
label var gap6770_lib "Change in Liberal vote share 67-70"

gen gap6773_lib=liberal73-liberal67
label var gap6773_lib "Change in Liberal vote share 67-73"

gen gap6367_lib=liberal63-liberal67
label var gap6367_lib "Change in Liberal vote share 63-67"

gen priest2=priest_vicar*priest_vicar
ge monas2=monastery*monastery

gen DM_t=DM
recode DM_t (9=3)

replace urban=1 if urban>0

encode province, gen(pv)

label var industry "Industry"
label var napo "Napoleon"
label var wittenberg "Wittenberg"
label var priest_vicar "Priests"
label var paris "Paris"
label var DM_t "DM"
label var cathp_64 "% Catholics"
label var monastery "Monasteries"
label var urban "City"

recode baseline_sample (1=0)(0=1) /*reverse the order so that baseline_sample==0*/ 

foreach j of numlist 67 70 73{
label var liberal`j' "`j'; liberal vote share"
label var conserv`j' "`j'; conservative vote share "
label var center`j' "`j'; center vote share"
label var other`j' "`j'; other parties vote share"
}

replace paris=paris*100
replace wittenberg=wittenberg/1000

save "state wide.dta", replace

